vllm serve /home/renchong/.cache/modelscope/hub/Qwen/Qwen3-4B-Instruct-2507	\
	--max-model-len 1024 --port 10085 \
	--served-model-name nlu_model \
	--gpu_memory_utilization 0.8 \
	--enable-lora \
	--lora-modules lora_domain=/home/renchong/workspace/LLaMA-Factory/saves/Qwen3-4B-Instruct-2507/lora/Qwen3-4B-Instruct-2507_domain_v3

#/home/renchong/workspace/LLaMA-Factory/saves/Qwen3-4B-Instruct-2507/lora/Qwen3-4B-Instruct-2507_domain_v2
#	--chat-template ./qwen3_nonthinking_domain.jinja
# lora_domain_v1=/home/renchong/workspace/LLaMA-Factory/saves/Qwen3-0.6B-Instruct/lora/train_2025-07-29-18-03-11 
# --lora-modules lora_domain=/home/renchong/workspace/LLaMA-Factory/saves/Qwen3-8B-Instruct-AWQ/lora/train_2025-07-31-19-06-13 \